Set Up

Path

#path
myPath <- "/Users/tyler/Dropbox/_200CN/labs/week8/data"


#unzip (only once so we'll use an if)
if(!file.exists(file.path(myPath, 'rs/samples.rds'))) { 
  unzip(file.path(myPath, 'rsdata.zip'), exdir= myPath)
}

set.seed(520)

Libs

library(raster)
## Loading required package: sp
library(rasterVis)
## Loading required package: lattice
## Loading required package: latticeExtra
## Loading required package: RColorBrewer
library(rpart)
library(gridExtra)
library(dismo)
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:gridExtra':
## 
##     combine

Exploration

# first create a list of raster layers to use
filenames <- paste0(file.path(myPath,'rs/LC08_044034_20170614_B'), 1:11, ".tif")
head(filenames)
## [1] "/Users/tyler/Dropbox/_200CN/labs/week8/data/rs/LC08_044034_20170614_B1.tif"
## [2] "/Users/tyler/Dropbox/_200CN/labs/week8/data/rs/LC08_044034_20170614_B2.tif"
## [3] "/Users/tyler/Dropbox/_200CN/labs/week8/data/rs/LC08_044034_20170614_B3.tif"
## [4] "/Users/tyler/Dropbox/_200CN/labs/week8/data/rs/LC08_044034_20170614_B4.tif"
## [5] "/Users/tyler/Dropbox/_200CN/labs/week8/data/rs/LC08_044034_20170614_B5.tif"
## [6] "/Users/tyler/Dropbox/_200CN/labs/week8/data/rs/LC08_044034_20170614_B6.tif"
#create raster stack from the object `filenames`
landsat <- stack(filenames)

#now we have object `landsat` for q1
landsat
## class       : RasterStack 
## dimensions  : 1245, 1497, 1863765, 11  (nrow, ncol, ncell, nlayers)
## resolution  : 30, 30  (x, y)
## extent      : 594090, 639000, 4190190, 4227540  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=10 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## names       : LC08_044034_20170614_B1, LC08_044034_20170614_B2, LC08_044034_20170614_B3, LC08_044034_20170614_B4, LC08_044034_20170614_B5, LC08_044034_20170614_B6, LC08_044034_20170614_B7, LC08_044034_20170614_B8, LC08_044034_20170614_B9, LC08_044034_20170614_B10, LC08_044034_20170614_B11 
## min values  :            9.641791e-02,            7.483990e-02,            4.259216e-02,            2.084067e-02,            8.457669e-04,           -7.872183e-03,           -5.052945e-03,            3.931751e-02,           -4.337332e-04,             2.897978e+02,             2.885000e+02 
## max values  :              0.73462820,              0.71775615,              0.69246972,              0.78617686,              1.01243150,              1.04320455,              1.11793602,              0.82673049,              0.03547901,             322.43139648,             317.99530029

Question 1

Use the plotRGB function with RasterStack landsat to create a true and false color composite (hint remember the position of the bands in the stack).

Get the plots ready

par(mfrow=c(1,2))
#plotRBG, specify our rgb layers from landsat raster
#r = 4, g = 3, b = 2, NIR = 5
plotRGB(landsat, r = 4, g = 3, b = 2, stretch = "lin", axes = TRUE, main="Landsat True Color Composite")

#this is the false color composit, with NIR, Red, Green replacing r, g, b
plotRGB(landsat, r = 5, g = 4, b = 3, stretch = "lin", main="Landsat False Color Composite", axes = TRUE)

par(mfrow=c(1,1))

landsat <- subset(landsat, 1:7)
names(landsat)
## [1] "LC08_044034_20170614_B1" "LC08_044034_20170614_B2"
## [3] "LC08_044034_20170614_B3" "LC08_044034_20170614_B4"
## [5] "LC08_044034_20170614_B5" "LC08_044034_20170614_B6"
## [7] "LC08_044034_20170614_B7"
names(landsat) <- c('ultra-blue', 'blue', 'green', 'red', 'NIR', 'SWIR1', 'SWIR2')
names(landsat)
## [1] "ultra.blue" "blue"       "green"      "red"        "NIR"       
## [6] "SWIR1"      "SWIR2"

Question 2

Interactive selection from the image is also possible. Use drawExtent and drawPoly to select an area of interest

#this code doesn't run in the knit but I'm showing it for reference
#here I created new plot with title, and ran the drawExtent function to pick my own extent
#I then exported the image which shows below
plotRGB(landsat, stretch = "lin", main = "drawExtent() Selection", axes = TRUE)
drawExtent()

#i did the same thing but then used drawPoly, which gives a polygon selection
plotRGB(landsat, stretch = "lin", main = "drawPoly() Selection", axes = TRUE)
drawPoly()

Above you can see the area selection in the red box from running the drawExtent() function.

Above you can see the selection in red from drawPoly(). It’s worth noting that these functions also be saved as objects or matrices, which we could then use to crop the raster.

Question 3

Use the RasterStack landsatcrop to create a true and false color composite

Create the landsatcrop object

# specify the extent we want to crop
e <- extent(624387, 635752, 4200047, 4210939)
# crop landsat by the extent
landsatcrop <- crop(landsat, e)

True / False Composite

par(mfrow=c(1,2))
#plotRBG, specify our rgb layers from landsat raster
#r = 4, g = 3, b = 2, NIR = 5
plotRGB(landsatcrop, r = 4, g = 3, b = 2, stretch = "lin", axes = TRUE, main="Extent True Color Composite")

#this is the false color composit, with NIR, Red, Green replacing r, g, b
plotRGB(landsatcrop, r = 5, g = 4, b = 3, stretch = "lin", main="Extent False Color Composite", axes = TRUE)

par(mfrow=c(1,1))

Extract pixel values

# load the polygons with land use land cover information
samp <- readRDS(file.path(myPath,'rs/samples.rds'))
# generate 300 point samples from the polygons
ptsamp <- spsample(samp, 300, type='regular')
# add the land cover class to the points
ptsamp$class <- over(ptsamp, samp)$class
# extract values with points
df <- extract(landsat, ptsamp)
# To see some of the reflectance values
head(df)
##      ultra.blue      blue     green       red       NIR     SWIR1
## [1,]  0.1398775 0.1246537 0.1123791 0.1166080 0.1830336 0.2308088
## [2,]  0.1369716 0.1216826 0.1077816 0.1119671 0.1886070 0.2424544
## [3,]  0.1382511 0.1210320 0.1054178 0.1045503 0.1843781 0.2125705
## [4,]  0.1320054 0.1296415 0.1386848 0.1915997 0.3490216 0.3433614
## [5,]  0.1270392 0.1194055 0.1209236 0.1628652 0.2960416 0.3274653
## [6,]  0.1465787 0.1490943 0.1713663 0.2310040 0.3592793 0.4000064
##          SWIR2
## [1,] 0.1914262
## [2,] 0.2089705
## [3,] 0.1724072
## [4,] 0.1989731
## [5,] 0.1905587
## [6,] 0.2531892

Spectral Profiles

ms <- aggregate(df, list(ptsamp$class), mean)
# instead of the first column, we use row names
rownames(ms) <- ms[,1]
ms <- ms[,-1]

which.max(ms[1,])
## NIR 
##   5
# Create a vector of color for the land cover classes for use in plotting
mycolor <- c('darkred', 'yellow', 'burlywood', 'cyan', 'blue')
#transform ms from a data.frame to a matrix
ms <- as.matrix(ms)
# First create an empty plot
plot(0, ylim=c(0,0.6), xlim = c(1,7), type='n', xlab="Bands", ylab = "Reflectance")
# add the different classes
for (i in 1:nrow(ms)){
  lines(ms[i,], type = "l", lwd = 3, lty = 1, col = mycolor[i])
}
# Title
title(main="Spectral Profile from Landsat", font.main = 2)
# Legend
legend("topleft", rownames(ms),
       cex=0.8, col=mycolor, lty = 1, lwd =3, bty = "n")

Basic Math

Question 1

Adapt the code shown above to compute indices to identify i) water and ii) built-up. Hint: Use the spectral profile plot to find the bands having maximum and minimum reflectance for these two classes

#` function that computes an index
#` @img is the raster image, @k and @i are the layers
vi <- function(img, k, i) {
  bk <- img[[k]] #get image layer of k 
  bi <- img[[i]] #get image layer of i
  vi <- (bk - bi) / (bk + bi) #the diff between k and i over the sum of k and i give the index
  return(vi) #return the index raster
}
#get max / min reflectance for built (1)
which.max(ms[1,]) #5 NIR
## NIR 
##   5
which.min(ms[1,]) #3 GREEN
## blue 
##    2
#get max / min reflectance for water (5)
which.max(ms[5,]) #1 ultrablue
## ultra.blue 
##          1
which.min(ms[5,]) #7 shortwave IR 2
## SWIR2 
##     7

Now we know which bands are the max and min for water and built-up layers. Let’s make the indicies.

# For Landsat NIR = 5, g = 3
#built index
ndbi <- vi(landsat, 5, 3)
plot(ndbi, col = rev(terrain.colors(10)), main = "Landsat-NDVI-Built")

# For Landsat ultra blue = 1, SWIR2 = 7
#water index
ndwi <- vi(landsat, 1, 7)
plot(ndwi, col = rev(terrain.colors(10)), main = "Landsat-NDVI-Water")

Question 2

Make histograms of the values the vegetation indices developed in question 1

# view histogram of data
min(ndbi)
## Warning in min(new("RasterLayer", file = new(".RasterFile", name = "/
## private/var/folders/2r/plhpcf711tv__rnpl26vqglh0000gn/T/Rtmpevlkx9/raster/
## r_tmp_2019-05-26_210733_37829_27734.grd", : Nothing to summarize if you
## provide a single RasterLayer; see cellStats
## class       : RasterLayer 
## dimensions  : 1245, 1497, 1863765  (nrow, ncol, ncell)
## resolution  : 30, 30  (x, y)
## extent      : 594090, 639000, 4190190, 4227540  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=10 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## data source : /private/var/folders/2r/plhpcf711tv__rnpl26vqglh0000gn/T/Rtmpevlkx9/raster/r_tmp_2019-05-26_210733_37829_27734.grd 
## names       : layer 
## values      : -0.9733608, 0.7963593  (min, max)
hist(ndbi,
     main = "Distribution of NDVI-Built values",
     xlab = "NDVI",
     ylab= "Frequency",
     col = "wheat",
     xlim = c(-0.75, 1),
     breaks = 30,
     xaxt = 'n')
## Warning in .hist1(x, maxpixels = maxpixels, main = main, plot = plot, ...):
## 5% of the raster cells were used. 100000 values used.
axis(side=1, at = seq(-0.75,1, 0.05), labels = seq(-0.75,1, 0.05))

hist(ndwi,
     main = "Distribution of NDVI-Water values",
     xlab = "NDVI",
     ylab= "Frequency",
     col = "wheat",
     xlim = c(-0.75, 1),
     breaks = 30,
     xaxt = 'n')
## Warning in .hist1(x, maxpixels = maxpixels, main = main, plot = plot, ...):
## 5% of the raster cells were used. 100000 values used.
axis(side=1, at = seq(-0.75,1, 0.05), labels = seq(-0.75,1, 0.05))

Question 3

Is it possible to find water using thresholding of NDVI or any other indices?

# get the NDVI For Landsat NIR = 5, red = 4
ndvi <- vi(landsat, 5, 4)
water <- reclassify(ndvi, c(-Inf,0, 1,  0, Inf, NA))
plot(water, main = 'Water from Thresholding the Vegetation Index', col = "blue")

Unsupervised Classification

#get the 2011 landsat 5 data
landsat5 <- stack(file.path(myPath, 'rs/centralvalley-2011LT5.tif'))

#name the bands
names(landsat5) <- c('blue', 'green', 'red', 'NIR', 'SWIR1', 'SWIR2')

Question 1

Make a 3-band False Color Composite plot of landsat5.

#plot rgb
plotRGB(landsat5, r = 4, g = 3, b = 2, stretch = "lin", main="Extent False Color Composite", axes = TRUE)


#compute the ndvi w/o a function using straight algebra
ndvi <- 
  (landsat5[['NIR']] - landsat5[['red']]) / (landsat5[['NIR']] + landsat5[['red']])

k-means classification

prepare data

# Extent to crop ndvi layer (this could be any extent in the data)
# but using the example extent here
e <- extent(-121.807, -121.725, 38.004, 38.072)

# crop landsat ndvi by the extent
ndvi <- crop(ndvi, e)
ndvi
## class       : RasterLayer 
## dimensions  : 252, 304, 76608  (nrow, ncol, ncell)
## resolution  : 0.0002694946, 0.0002694946  (x, y)
## extent      : -121.807, -121.725, 38.00413, 38.07204  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0 
## data source : in memory
## names       : layer 
## values      : -0.3360085, 0.7756007  (min, max)
#reproject the raster into a matrix
#we need this to do the k-means classification
nr <- getValues(ndvi)
str(nr)
##  num [1:76608] 0.245 0.236 0.272 0.277 0.277 ...

classify

# It is important to set the seed generator because `kmeans` initiates the centers in random locations
set.seed(99)

# We want to create 10 clusters, allow 500 iterations, start with 5 random sets using "Lloyd" method
kmncluster <- kmeans(na.omit(nr), centers = 10, iter.max = 500, nstart = 5, algorithm="Lloyd")

# kmeans returns an object of class "kmeans"
str(kmncluster)
## List of 9
##  $ cluster     : int [1:76608] 4 4 3 3 3 3 3 4 4 4 ...
##  $ centers     : num [1:10, 1] 0.55425 0.00498 0.29997 0.20892 -0.20902 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:10] "1" "2" "3" "4" ...
##   .. ..$ : NULL
##  $ totss       : num 6459
##  $ withinss    : num [1:10] 5.69 6.13 4.91 4.9 5.75 ...
##  $ tot.withinss: num 55.8
##  $ betweenss   : num 6403
##  $ size        : int [1:10] 8932 4550 7156 6807 11672 8624 8736 5040 9893 5198
##  $ iter        : int 108
##  $ ifault      : NULL
##  - attr(*, "class")= chr "kmeans"
# Use the ndvi object to set the cluster values to a new raster
knr <- setValues(ndvi, kmncluster$cluster)

#create a color pallete for the 10 clusters
mycolor <- c("#fef65b","#ff0000", "#daa520","#0000ff","#0000ff","#00ff00","#cbbeb5",
             "#c3ff5b", "#ff7373", "#00ff00", "#808080")

#plot
par(mfrow = c(1,2))
plot(ndvi, col = rev(terrain.colors(10)), main = 'Landsat-NDVI')
plot(knr, main = 'Unsupervised classification', col = mycolor )

Question 2

Plot 3-band RGB of landsat5 for the subset (extent e) and result of kmeans clustering side-by-side and make a table of land-use land-cover labels for the clusters. E.g. cluster 4 and 5 are water.

# crop landsat 5 to extent e
landsat5_e <- crop(landsat5, e)

# plot
par(mfrow = c(1,2))

plotRGB(landsat5_e, "red", "green", "blue", stretch = "lin", axes = T, main = "landsat5 true color")
plot(knr, main = 'Unsupervised classification', col = mycolor )

Cluster Land-Use/Cover
1 Vegetation
2 Urban
3 Urban
4 Water
5 Water
6 Vegetation
7 Water
8 Vegetation
9 Vegetation
10 Urban

Supervised Classification

Load up

# load in national land cover database raster
nlcd <- brick('data/rs/nlcd-L1.tif')

# has 2001 and 2011 data
names(nlcd) <- c("nlcd2001", "nlcd2011")

# The class names and colors for plotting
nlcdclass <- c("Water", "Developed", "Barren", "Forest", "Shrubland", "Herbaceous", "Planted/Cultivated", "Wetlands")
classdf <- data.frame(classvalue1 = c(1,2,3,4,5,7,8,9), classnames1 = nlcdclass)

# Hex codes of colors for plotting
classcolor <- c("#5475A8", "#B50000", "#D2CDC0", "#38814E", "#AF963C", "#D1D182", "#FBF65D", "#C8E6F8")

# Now we ratify (RAT = "Raster Attribute Table") the ncld2011 (define RasterLayer as a categorical variable). This is helpful for plotting.
nlcd2011 <- nlcd[[2]] #2011 data
nlcd2011 <- ratify(nlcd2011) 
rat <- levels(nlcd2011)[[1]]
rat$landcover <- nlcdclass
levels(nlcd2011) <- rat

generate sampled sites

# 2011 nlcd sample 200 samples
samp2011 <- sampleStratified(nlcd2011, size = 200, na.rm = TRUE, sp = TRUE)

# plot sample sites over raster to visualize
plt <- levelplot(nlcd2011, col.regions = classcolor, 
                 main = 'Distribution of Training Sites')
print(plt + layer(sp.points(samp2011, pch = 3, cex = 0.5, col = 1)))

extract values from landsat5 for training sites. these band values will be predictor variables and “classvalues” from nlcd2011 will be response variable

# using uncropped landsat5 data

# Extract the layer values for the locations
sampvals <- extract(landsat5, samp2011, df = TRUE)

# sampvals no longer has the spatial information. To keep the spatial information you use `sp=TRUE` argument in the `extract` function.

# drop the ID column
sampvals <- sampvals[, -1]

# combine the class information with extracted values for each sample point
sampdata <- data.frame(classvalue = samp2011@data$nlcd2011, sampvals)

head(sampdata)
##   classvalue       blue      green        red        NIR      SWIR1
## 1          1 0.09952746 0.08033110 0.06037629 0.04313058 0.02056747
## 2          1 0.10533331 0.08517402 0.06796144 0.05823631 0.04033335
## 3          1 0.10034013 0.07658263 0.05747302 0.04934990 0.02022632
## 4          1 0.11089675 0.09958038 0.08909610 0.27276725 0.16273591
## 5          1 0.11717977 0.10284505 0.08171622 0.05237862 0.02773173
## 6          1 0.12077010 0.11270922 0.09771536 0.06949051 0.02774821
##        SWIR2
## 1 0.01930458
## 2 0.02872942
## 3 0.01196331
## 4 0.09231622
## 5 0.01968904
## 6 0.02204543

train the classifier using the sample LULC classes from NLCD

# Train the model using rpart() where the LULC class prediction is a function of each band
cart <- rpart(as.factor(classvalue)~., data=sampdata, method = 'class', minsplit = 5)

# Plot the trained classification tree
plot(cart, uniform=TRUE, main="Classification Tree")
text(cart, cex = 0.8)

classify the rest of the raster landsat5 pixels using the trained prediciton model

#get predictions from the class tree
pr2011 <- predict(landsat5, cart, type='class')

# plot
pr2011 <- ratify(pr2011)
rat <- levels(pr2011)[[1]]
rat$legend <- classdf$classnames
levels(pr2011) <- rat
levelplot(pr2011, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "Decision Tree classification of Landsat 5")

Question 1

Plot nlcd2011 and pr2011 side-by-side and comment about the accuracy of the prediction (e.g. mixing between cultivated crops, pasture, grassland and shrubs).

# I set the colorkey to false so the plots actually show at a legible size
p1 <- levelplot(nlcd2011, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "NLCD2011", colorkey = FALSE)


p2 <- levelplot(pr2011, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "PR2011", colorkey = FALSE)

grid.arrange(p1, p2, ncol = 2)

The predictions here are okay, but aren’t totally accurate for any land use / land cover types. For example, waters and wetlands as a whole are pretty accurate, but between water and wetland, there is some discrepancy probably due to the similar spectral signatures. Similarly, the vast amount of cultivated land has predictions in the developed, herbaceaous and wetlands. This is again probably due to spectral signature similarity. The urban areas are mixed up probably due to spectral signatures along with multiple categorical density.


Model evaluation using k-fold cross validation Evaluation is done using the original sample points

# get 5 k-folds for cross validation
j <- kfold(sampdata, k = 5, by=sampdata$classvalue)

# create a list for the loop
x <- list()

# do the class tree 5 times with the test and train data for each fold
for (k in 1:5) {
    train <- sampdata[j!= k, ]
    test <- sampdata[j == k, ]
    cart <- rpart(as.factor(classvalue)~., data=train, method = 'class', minsplit = 5)
    pclass <- predict(cart, test, type='class')
    # create a data.frame using the reference and prediction
    x[[k]] <- cbind(test$classvalue, as.integer(pclass))
}

combine results of kfold test and compute confusion matrix

# create df of kfold results, showing obs/pred values for all points 5x
y <- do.call(rbind, x)
y <- data.frame(y)
colnames(y) <- c('observed', 'predicted')

# table the df to make confusion matrix
conmat <- table(y)

# change the name of the classes
colnames(conmat) <- classdf$classnames
rownames(conmat) <- classdf$classnames

conmat
##                     predicted
## observed             Water Developed Barren Forest Shrubland Herbaceous
##   Water                174         3      0      2         4          0
##   Developed              2        96     35      8        10         38
##   Barren                 5        37     59      2        13         72
##   Forest                 0         2      1     94        61          8
##   Shrubland              0         5      2     49       110         11
##   Herbaceous             0        15     46      2        11        112
##   Planted/Cultivated     0        11      9     22        46         39
##   Wetlands              16         7      3     28        36          9
##                     predicted
## observed             Planted/Cultivated Wetlands
##   Water                               5       12
##   Developed                           7        4
##   Barren                             10        2
##   Forest                             17       17
##   Shrubland                          18        5
##   Herbaceous                         14        0
##   Planted/Cultivated                 65        8
##   Wetlands                           32       69

##Question 2

Comment on the miss-classification between different classes.

First, let’s see which categories were easy and hard to classify.

#get the actual vs total predictions
as.data.frame(diag(conmat) / rowSums(conmat))
##                    diag(conmat)/rowSums(conmat)
## Water                                     0.870
## Developed                                 0.480
## Barren                                    0.295
## Forest                                    0.470
## Shrubland                                 0.550
## Herbaceous                                0.560
## Planted/Cultivated                        0.325
## Wetlands                                  0.345

From the accuracy results we can see that water is pretty easily identified, while the remaining categories have a best prediction rate of only 51.5%. It would seem wetland and cultivated are the harded to identify, both with 33.5% accuracy. Wetlands is misclassed as shrubland, cultivated and forest most often, while cultivated is misclassed as shrubland and herbaceous most often.

Question 3

Can you think of ways to to improve the accuracy.

Perhaps using a vegetatation index when making the training / test predictions would improve accuracy.

Question 4

Perform the classification using Random Forest classifiers from the randomForest package

#this is basically a repeat of the rpart but with randomForest
# use randomforest for a fit model
rf <- randomForest(as.factor(classvalue)~., data=sampdata, method = 'class', minsplit = 5)

# get rf predictions with the new model
pr2011.rf <- predict(landsat5, rf, type='class')

# awww rats
pr2011.rf <- ratify(pr2011.rf)
rat <- levels(pr2011.rf)[[1]]
rat$legend <- classdf$classnames
levels(pr2011.rf) <- rat

# plot
levelplot(pr2011.rf, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "Random Forest")

#again make the plot with colorkey=false so they fit side by side
rfp <- levelplot(pr2011.rf, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "Random Forest",
          colorkey = FALSE)

Question 5

Plot the results of rpart and Random Forest classifier side-by-side.

#p2 is the rpart and rfp is the random forest
grid.arrange(p2, rfp, ncol = 2)

Question 6

__(optional):Repeat the steps for the year 2001 using Random Forest. Use cloud-free composite image from Landsat 7 with 6-bands. Use as reference data the National Land Cover Database 2001 (NLCD 2001) for the subset of the California Central Valley.*__

#load the 2001 landsat data
landsat7 <- stack(file.path(myPath, 'rs/centralvalley-2001LE7.tif'))

#name the bands
names(landsat7) <- c('blue', 'green', 'red', 'NIR', 'SWIR1', 'SWIR2')
#get the 2001 nlcd data from the data we loaded earlier
nlcd2001 <- nlcd[[1]]

#ratify it
nlcd2001 <- ratify(nlcd2001)
rat <- levels(nlcd2001)[[1]]
rat$landcover <- nlcdclass
levels(nlcd2001) <- rat
#get sample data from 2001 nlcd data
samp2001 <- sampleStratified(nlcd2001, size = 200, na.rm = TRUE, sp = TRUE)

# using uncropped landsat7 data

# Extract the layer values for the locations
sampvals <- extract(landsat7, samp2001, df = TRUE)

# drop the ID column
sampvals <- sampvals[, -1]

# combine the class information with extracted values for each sample point
sampdata <- data.frame(classvalue = samp2001@data$nlcd2001, sampvals)

head(sampdata)
##   classvalue      blue      green        red        NIR      SWIR1
## 1          1 0.1243593 0.11331026 0.10211341 0.05622215 0.02377453
## 2          1 0.1187809 0.10258236 0.08572009 0.04189457 0.01534228
## 3          1 0.1046298 0.08580485 0.07627758 0.14863987 0.09102381
## 4          1 0.1049763 0.08234032 0.06441287 0.06089962 0.02823475
## 5          1 0.1248476 0.11039074 0.10338834 0.06596798 0.03687114
## 6          1 0.1115839 0.08528487 0.06251894 0.04269759 0.02180964
##        SWIR2
## 1 0.01734177
## 2 0.01223762
## 3 0.05049356
## 4 0.01824325
## 5 0.02548085
## 6 0.01326413
#random forest fit
rf <- randomForest(as.factor(classvalue)~., data=sampdata, method = 'class', minsplit = 5)

# use rf model to predict values
pr2001.rf <- predict(landsat7, rf, type='class')

# ratify it
pr2001.rf <- ratify(pr2001.rf)
rat <- levels(pr2001.rf)[[1]]
rat$legend <- classdf$classnames
levels(pr2001.rf) <- rat

levelplot(pr2001.rf, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "Random Forest 2001 Landsat 7")

# plot
rfp2 <- levelplot(pr2001.rf, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "Random Forest 2001", 
          colorkey = FALSE)

Compare to the 2011

grid.arrange(rfp, rfp2, ncol = 2)